Assignment: RNN and ConvNets (60 points)¶

The data file ”data.csv” contains 3 time series x1, x2, and y along with the corresponding date column. The data ranges from beginning of 2019 to the end of Feb. of 2020. The objective of this problem is to make predictions for y for March 1st and 2nd in 2020.

Kai Hsin Hung | Harshitha Mallappa

1. Explore regular feedforward neural network models for this problem. (10points)¶

In [1]:
import pandas as pd
import numpy as np
In [2]:
data = pd.read_csv('timeseriesData.csv')
data.head()
Out[2]:
Date x1 x2 y
0 1/1/19 51.0 5.550000 65.58
1 1/2/19 51.0 8.950000 65.35
2 1/3/19 43.0 7.033333 69.80
3 1/4/19 43.0 7.033333 69.76
4 1/5/19 53.0 4.950000 70.48
In [3]:
data.info()
missing_val = data.isna().sum()
print(f'Missing val:\n{missing_val}')
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 427 entries, 0 to 426
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    425 non-null    object 
 1   x1      387 non-null    float64
 2   x2      387 non-null    float64
 3   y       387 non-null    float64
dtypes: float64(3), object(1)
memory usage: 13.5+ KB
Missing val:
Date     2
x1      40
x2      40
y       40
dtype: int64
In [4]:
# data imputation for Na values
for col in ['x1', 'x2', 'y', 'Date']:
    data[col] = data[col].ffill()
missing_val = data.isna().sum()
print(f'Missing val:\n{missing_val}')

# Change Date dtype ot Date and sort it
data['Date'] = pd.to_datetime(data['Date'])
data = data.sort_values('Date')

feature = ['x1', 'x2']
target = 'y'
Missing val:
Date    0
x1      0
x2      0
y       0
dtype: int64
/var/folders/f8/_j25ckdn20vbvtb_vyn8bz0r0000gn/T/ipykernel_14288/2962500903.py:8: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.
  data['Date'] = pd.to_datetime(data['Date'])
In [5]:
data
Out[5]:
Date x1 x2 y
0 2019-01-01 51.0 5.550000 65.58
1 2019-01-02 51.0 8.950000 65.35
2 2019-01-03 43.0 7.033333 69.80
3 2019-01-04 43.0 7.033333 69.76
4 2019-01-05 53.0 4.950000 70.48
... ... ... ... ...
422 2020-02-27 19.0 5.483333 83.62
423 2020-02-28 19.0 5.483333 83.62
424 2020-02-29 65.0 6.183333 68.53
425 2020-02-29 65.0 6.183333 68.53
426 2020-02-29 65.0 6.183333 68.53

427 rows × 4 columns

In [6]:
# split the data set, portion: 70% of training  15% of validation 15% of tes
X = data[feature].values
y = data[target].values
train_portion = round(X.shape[0] * 0.7)
val_portion = round(X.shape[0] * 0.15)
train_data_y = y[:train_portion].reshape(-1, 1)
val_data_y = y[train_portion:train_portion+val_portion].reshape(-1, 1)
test_data_y = y[train_portion+val_portion:].reshape(-1, 1)
# ensure the portion of dataset
print(f"training: {len(train_data_y)}, val: {len(val_data_y)}, test: {len(test_data_y)}")
training: 299, val: 64, test: 64
In [7]:
# preprocessing
from sklearn.preprocessing import MinMaxScaler


sc = MinMaxScaler(feature_range=(0, 1))
sc.fit(train_data_y)
train_norm = sc.transform(train_data_y)
val_norm = sc.transform(val_data_y)
test_norm = sc.transform(test_data_y)
In [8]:
# convert sequence function
def to_sequence(data, look_back, foresight):
    X, Y = [], []
    for i in range(len(data) - (look_back + foresight)):
        look_back_seq = data[i:(i+look_back), 0]
        foresight_seq = (data[i + (look_back+foresight), 0])
        X.append(look_back_seq)
        Y.append(foresight_seq)
    return np.array(X), np.array(Y)
In [9]:
# setting look back and foresight like the lecture
train_seqX, train_seqY = to_sequence(train_norm, look_back=7, foresight=6)
val_seqX, val_seqY = to_sequence(val_norm, look_back=7, foresight=6)
test_seqX, test_seqY = to_sequence(test_norm, look_back=7, foresight=6)
In [10]:
from keras.models import Sequential
from keras.layers import Dense, Input
feedforward_model = Sequential()
# set input_shape = 7 to align look_back number
# Dense model only have 2D input
feedforward_model.add(Input(shape = (7, )))
feedforward_model.add(Dense(64, activation='relu'))
feedforward_model.add(Dense(32, activation='relu'))
feedforward_model.add(Dense(1, activation='linear'))
feedforward_model.compile(loss='mae', optimizer='adam', metrics=['mean_absolute_error'])
feedforward_model.summary()
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ dense (Dense)                   │ (None, 64)             │           512 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_1 (Dense)                 │ (None, 32)             │         2,080 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_2 (Dense)                 │ (None, 1)              │            33 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
 Total params: 2,625 (10.25 KB)
 Trainable params: 2,625 (10.25 KB)
 Non-trainable params: 0 (0.00 B)
In [11]:
from keras.callbacks import EarlyStopping
checkpoint = EarlyStopping(monitor='val_loss', patience=5, mode='auto',  restore_best_weights = True)
callback_list = [checkpoint]
feedforward_network = feedforward_model.fit(train_seqX, train_seqY, validation_data=(val_seqX, val_seqY),
                                            epochs = 100, batch_size = 64, callbacks = callback_list)
Epoch 1/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 13ms/step - loss: 0.0340 - mean_absolute_error: 0.0340 - val_loss: 0.0122 - val_mean_absolute_error: 0.0122
Epoch 2/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0154 - mean_absolute_error: 0.0154 - val_loss: 0.0145 - val_mean_absolute_error: 0.0145
Epoch 3/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0219 - mean_absolute_error: 0.0219 - val_loss: 0.0117 - val_mean_absolute_error: 0.0117
Epoch 4/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0181 - mean_absolute_error: 0.0181 - val_loss: 0.0123 - val_mean_absolute_error: 0.0123
Epoch 5/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0137 - mean_absolute_error: 0.0137 - val_loss: 0.0115 - val_mean_absolute_error: 0.0115
Epoch 6/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0179 - mean_absolute_error: 0.0179 - val_loss: 0.0117 - val_mean_absolute_error: 0.0117
Epoch 7/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0151 - mean_absolute_error: 0.0151 - val_loss: 0.0115 - val_mean_absolute_error: 0.0115
Epoch 8/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 0.0176 - mean_absolute_error: 0.0176 - val_loss: 0.0115 - val_mean_absolute_error: 0.0115
Epoch 9/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0117 - mean_absolute_error: 0.0117 - val_loss: 0.0114 - val_mean_absolute_error: 0.0114
Epoch 10/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0170 - mean_absolute_error: 0.0170 - val_loss: 0.0113 - val_mean_absolute_error: 0.0113
Epoch 11/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0143 - mean_absolute_error: 0.0143 - val_loss: 0.0114 - val_mean_absolute_error: 0.0114
Epoch 12/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0132 - mean_absolute_error: 0.0132 - val_loss: 0.0113 - val_mean_absolute_error: 0.0113
Epoch 13/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0115 - mean_absolute_error: 0.0115 - val_loss: 0.0114 - val_mean_absolute_error: 0.0114
Epoch 14/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0171 - mean_absolute_error: 0.0171 - val_loss: 0.0114 - val_mean_absolute_error: 0.0114
Epoch 15/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0133 - mean_absolute_error: 0.0133 - val_loss: 0.0114 - val_mean_absolute_error: 0.0114
Epoch 16/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0126 - mean_absolute_error: 0.0126 - val_loss: 0.0113 - val_mean_absolute_error: 0.0113
Epoch 17/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0167 - mean_absolute_error: 0.0167 - val_loss: 0.0113 - val_mean_absolute_error: 0.0113
Epoch 18/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0131 - mean_absolute_error: 0.0131 - val_loss: 0.0114 - val_mean_absolute_error: 0.0114
Epoch 19/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step - loss: 0.0162 - mean_absolute_error: 0.0162 - val_loss: 0.0114 - val_mean_absolute_error: 0.0114
Epoch 20/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0140 - mean_absolute_error: 0.0140 - val_loss: 0.0114 - val_mean_absolute_error: 0.0114
Epoch 21/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0116 - mean_absolute_error: 0.0116 - val_loss: 0.0114 - val_mean_absolute_error: 0.0114
In [12]:
from sklearn.metrics import mean_absolute_error
test_norm_predict = feedforward_model.predict(test_seqX)
# convert back to unnormalize
test_predict = sc.inverse_transform(test_norm_predict)
testY = sc.inverse_transform(test_seqY.reshape(-1, 1))
test_Mae = mean_absolute_error(testY, test_predict)
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 11ms/step

(a) Report the unnormalized MAE of the test set on your best model.¶

(b) Plot the loss curves for training and validation sets for the best model.¶

In [13]:
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(y = feedforward_network.history['loss'], mode = 'lines', name='Training error'))
fig.add_trace(go.Scatter(y = feedforward_network.history['val_loss'], mode='lines', name='Validation error'))
fig.update_layout(xaxis_title = 'Epochs', yaxis_title = 'Mean Absolute error', title_text=f'Unnormalized MAE = {test_Mae:.3f}')
fig.show()

(c) What are the predicted values of y for March 1st and March 2nd?¶

In [14]:
# get last 7 data due to look back setting to 7
data_norm = sc.transform(y.reshape(-1, 1))
last_7_values = data_norm[-7:].reshape(1, 7)
pred_march1_norm = feedforward_model.predict(last_7_values)
pred_march1_feedforward = sc.inverse_transform(pred_march1_norm)
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step
In [15]:
last_pred_combine = np.concatenate([data_norm.flatten(), pred_march1_norm.flatten()])
updated_last_7_values = last_pred_combine[-7:].reshape(1, 7)
pred_march2_norm = feedforward_model.predict(updated_last_7_values)
pred_march2_feedforward = sc.inverse_transform(pred_march2_norm)
print(f"Feed forward model prediction for March 1st {pred_march1_feedforward[0][0]:.3f}, prediction for March 2nd {pred_march2_feedforward[0][0]:.3f}")
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step
Feed forward model prediction for March 1st 71.550, prediction for March 2nd 72.606

2. Explore recurrent neural network models for this problem. (10 points)¶

LSTM¶

In [16]:
from keras.layers import LSTM
LSTM_model = Sequential()
LSTM_model.add(Input(shape=(7, 1)))
LSTM_model.add(LSTM(32, dropout = 0.1, recurrent_dropout = 0.1))
LSTM_model.add(Dense(1, activation='linear'))
LSTM_model.compile(loss='mae', optimizer='adam', metrics=['mean_absolute_error'])
LSTM_model.summary()
Model: "sequential_1"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ lstm (LSTM)                     │ (None, 32)             │         4,352 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_3 (Dense)                 │ (None, 1)              │            33 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
 Total params: 4,385 (17.13 KB)
 Trainable params: 4,385 (17.13 KB)
 Non-trainable params: 0 (0.00 B)
In [17]:
checkpoint = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto', restore_best_weights=True)
callback_list = [checkpoint]
LSTM_network = LSTM_model.fit(train_seqX, train_seqY, validation_data=(val_seqX, val_seqY), 
                              epochs = 100, batch_size = 64, callbacks = callback_list)
Epoch 1/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 1s 32ms/step - loss: 0.0228 - mean_absolute_error: 0.0228 - val_loss: 0.0131 - val_mean_absolute_error: 0.0131
Epoch 2/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 0.0196 - mean_absolute_error: 0.0196 - val_loss: 0.0138 - val_mean_absolute_error: 0.0138
Epoch 3/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0182 - mean_absolute_error: 0.0182 - val_loss: 0.0121 - val_mean_absolute_error: 0.0121
Epoch 4/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0136 - mean_absolute_error: 0.0136 - val_loss: 0.0135 - val_mean_absolute_error: 0.0135
Epoch 5/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0157 - mean_absolute_error: 0.0157 - val_loss: 0.0119 - val_mean_absolute_error: 0.0119
Epoch 6/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0159 - mean_absolute_error: 0.0159 - val_loss: 0.0124 - val_mean_absolute_error: 0.0124
Epoch 7/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0129 - mean_absolute_error: 0.0129 - val_loss: 0.0118 - val_mean_absolute_error: 0.0118
Epoch 8/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0150 - mean_absolute_error: 0.0150 - val_loss: 0.0122 - val_mean_absolute_error: 0.0122
Epoch 9/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0131 - mean_absolute_error: 0.0131 - val_loss: 0.0118 - val_mean_absolute_error: 0.0118
Epoch 10/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0127 - mean_absolute_error: 0.0127 - val_loss: 0.0121 - val_mean_absolute_error: 0.0121
Epoch 11/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0148 - mean_absolute_error: 0.0148 - val_loss: 0.0119 - val_mean_absolute_error: 0.0119
Epoch 12/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0131 - mean_absolute_error: 0.0131 - val_loss: 0.0120 - val_mean_absolute_error: 0.0120
Epoch 12: early stopping
Restoring model weights from the end of the best epoch: 7.
In [18]:
test_norm_predict = LSTM_model.predict(test_seqX)
test_predict = sc.inverse_transform(test_norm_predict)
testY = sc.inverse_transform(test_seqY.reshape(-1, 1))
testLSTM_Mae = mean_absolute_error(testY, test_predict)
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 76ms/step

(a) Report the unnormalized MAE of the test set on your best model.¶

(b) Plot the loss curves for training and validation sets for the best model.¶

In [19]:
fig = go.Figure()
fig.add_trace(go.Scatter(y=LSTM_network.history['loss'], mode='lines', name='Training error'))
fig.add_trace(go.Scatter(y=LSTM_network.history['val_loss'], mode='lines', name='Validation error'))
fig.update_layout(xaxis_title = 'Epochs', yaxis_title = 'Mean absolute error', title_text = f'Unnormalized MAE = {testLSTM_Mae:.3f}')
fig.show()

(c) What are the predicted values of y for March 1st and March 2nd?¶

In [20]:
data_norm = sc.transform(y.reshape(-1, 1))
last_7_values = data_norm[-7:].reshape(1, 7)
pred_march1_norm = LSTM_model.predict(last_7_values)
pred_march1_LSTM = sc.inverse_transform(pred_march1_norm)
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step
In [21]:
last_pred_combine = np.concatenate([data_norm.flatten(), pred_march1_norm.flatten()])
updated_last_7_values = last_pred_combine[-7:].reshape(1, 7)
pred_march2_norm = LSTM_model.predict(updated_last_7_values)
pred_march2_LSTM = sc.inverse_transform(pred_march2_norm)
print(f"LSTM model prediction for March 1st {pred_march1_LSTM[0][0]:.3f}, prediction for March 2nd {pred_march2_LSTM[0][0]:.3f}")
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step
LSTM model prediction for March 1st 72.899, prediction for March 2nd 72.956

GRU¶

In [22]:
from keras.layers import GRU
GRU_model = Sequential()
GRU_model.add(Input(shape=(7, 1)))
GRU_model.add(GRU(32, dropout = 0.1, recurrent_dropout = 0.1))
GRU_model.add(Dense(1, activation='linear'))
GRU_model.compile(loss='mae', optimizer='adam', metrics=['mean_absolute_error'])
In [23]:
checkpoint = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto', restore_best_weights=True)
callback_list = [checkpoint]
GRU_model_network = GRU_model.fit(train_seqX, train_seqY, validation_data=(val_seqX, val_seqY), 
                                  epochs = 100, batch_size = 64, callbacks = callback_list)
Epoch 1/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 1s 32ms/step - loss: 0.0200 - mean_absolute_error: 0.0200 - val_loss: 0.0148 - val_mean_absolute_error: 0.0148
Epoch 2/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0153 - mean_absolute_error: 0.0153 - val_loss: 0.0125 - val_mean_absolute_error: 0.0125
Epoch 3/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0149 - mean_absolute_error: 0.0149 - val_loss: 0.0117 - val_mean_absolute_error: 0.0117
Epoch 4/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0143 - mean_absolute_error: 0.0143 - val_loss: 0.0118 - val_mean_absolute_error: 0.0118
Epoch 5/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0128 - mean_absolute_error: 0.0128 - val_loss: 0.0116 - val_mean_absolute_error: 0.0116
Epoch 6/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0144 - mean_absolute_error: 0.0144 - val_loss: 0.0116 - val_mean_absolute_error: 0.0116
Epoch 7/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0155 - mean_absolute_error: 0.0155 - val_loss: 0.0117 - val_mean_absolute_error: 0.0117
Epoch 8/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0157 - mean_absolute_error: 0.0157 - val_loss: 0.0115 - val_mean_absolute_error: 0.0115
Epoch 9/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0154 - mean_absolute_error: 0.0154 - val_loss: 0.0116 - val_mean_absolute_error: 0.0116
Epoch 10/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0157 - mean_absolute_error: 0.0157 - val_loss: 0.0116 - val_mean_absolute_error: 0.0116
Epoch 11/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0149 - mean_absolute_error: 0.0149 - val_loss: 0.0115 - val_mean_absolute_error: 0.0115
Epoch 12/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0138 - mean_absolute_error: 0.0138 - val_loss: 0.0116 - val_mean_absolute_error: 0.0116
Epoch 13/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0134 - mean_absolute_error: 0.0134 - val_loss: 0.0116 - val_mean_absolute_error: 0.0116
Epoch 14/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0146 - mean_absolute_error: 0.0146 - val_loss: 0.0115 - val_mean_absolute_error: 0.0115
Epoch 15/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0139 - mean_absolute_error: 0.0139 - val_loss: 0.0115 - val_mean_absolute_error: 0.0115
Epoch 16/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0124 - mean_absolute_error: 0.0124 - val_loss: 0.0116 - val_mean_absolute_error: 0.0116
Epoch 16: early stopping
Restoring model weights from the end of the best epoch: 11.
In [24]:
test_norm_predict = GRU_model.predict(test_seqX)
print(test_norm_predict.shape)
test_predict = sc.inverse_transform(test_norm_predict)
testY = sc.inverse_transform(test_seqY.reshape(-1, 1))
test_GRUMae = mean_absolute_error(testY, test_predict)
WARNING:tensorflow:5 out of the last 9 calls to <function TensorFlowTrainer.make_predict_function.<locals>.one_step_on_data_distributed at 0x17a560160> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.
1/2 ━━━━━━━━━━━━━━━━━━━━ 0s 79ms/stepWARNING:tensorflow:6 out of the last 10 calls to <function TensorFlowTrainer.make_predict_function.<locals>.one_step_on_data_distributed at 0x17a560160> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 78ms/step
(51, 1)

(a) Report the unnormalized MAE of the test set on your best model.¶

(b) Plot the loss curves for training and validation sets for the best model.¶

In [25]:
fig = go.Figure()
fig.add_trace(go.Scatter(y=GRU_model_network.history['loss'], mode='lines', name='Traning error'))
fig.add_trace(go.Scatter(y=GRU_model_network.history['val_loss'], mode='lines', name='Validation error'))
fig.update_layout(xaxis_title = 'Epochs', yaxis_title = 'Mean absolute error', title_text = f'Unnormalized MAE: {test_GRUMae:.3f}')

(c) What are the predicted values of y for March 1st and March 2nd?¶

In [26]:
data_norm = sc.transform(y.reshape(-1, 1))
last_7_values = data_norm[-7:]
pred_march1_norm = GRU_model.predict(last_7_values)
pred_march1_GRU = sc.inverse_transform(pred_march1_norm)
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 116ms/step
In [27]:
last_pred_combine = np.concatenate([data_norm.flatten(), pred_march1_norm.flatten()])
updated_last_7_values = last_pred_combine[-7:].reshape(1, 7)
pred_march2_norm = GRU_model.predict(updated_last_7_values)
pred_march2_GRU = sc.inverse_transform(pred_march2_norm)
print(f"Prediction for March 1st {pred_march1_GRU[0][0]:.3f}, prediction for March 2nd {pred_march2_GRU[0][0]:.3f}")
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step
Prediction for March 1st 66.749, prediction for March 2nd 72.250
In [28]:
print('Compare LSTM and GRU')
print(f'LSTM:\nMAE: {testLSTM_Mae:.3f}\nMarch 1st: {pred_march1_LSTM[0][0]:.3f} March 2nd: {pred_march2_LSTM[0][0]:.3f}')
print(f'GRU:\nMAE: {test_GRUMae:.3f}\nMarch 1st: {pred_march1_GRU[0][0]:.3f} March 2nd: {pred_march2_GRU[0][0]:.3f}')
Compare LSTM and GRU
LSTM:
MAE: 6.723
March 1st: 72.899 March 2nd: 72.956
GRU:
MAE: 6.925
March 1st: 66.749 March 2nd: 72.250

3. Explore 1d convolutional neural network models for this problem. (10 points)¶

In [29]:
from keras.layers import Conv1D,MaxPool1D, Flatten

conv_model = Sequential()
# kernel size = the step of model going throuhg
# pool size = taking max of the number
conv_model.add(Input(shape=(7, 1)))
conv_model.add(Conv1D(filters=32, kernel_size=1, activation='relu'))
conv_model.add(MaxPool1D(pool_size=2))
conv_model.add(Flatten())
conv_model.add(Dense(64, activation='relu'))
conv_model.add(Dense(1, activation='linear'))
conv_model.compile(loss='mae', optimizer='adam', metrics=['mean_absolute_error'])
In [30]:
checkpoint = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto', restore_best_weights=True)
callback_list = [checkpoint]
conv_model_network = conv_model.fit(train_seqX, train_seqY, validation_data=(val_seqX, val_seqY), 
                                  epochs = 100, batch_size = 64, callbacks = callback_list)
Epoch 1/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 14ms/step - loss: 0.0206 - mean_absolute_error: 0.0206 - val_loss: 0.0140 - val_mean_absolute_error: 0.0140
Epoch 2/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0156 - mean_absolute_error: 0.0156 - val_loss: 0.0136 - val_mean_absolute_error: 0.0136
Epoch 3/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0143 - mean_absolute_error: 0.0143 - val_loss: 0.0123 - val_mean_absolute_error: 0.0123
Epoch 4/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0135 - mean_absolute_error: 0.0135 - val_loss: 0.0126 - val_mean_absolute_error: 0.0126
Epoch 5/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0140 - mean_absolute_error: 0.0140 - val_loss: 0.0122 - val_mean_absolute_error: 0.0122
Epoch 6/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0180 - mean_absolute_error: 0.0180 - val_loss: 0.0120 - val_mean_absolute_error: 0.0120
Epoch 7/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0119 - mean_absolute_error: 0.0119 - val_loss: 0.0121 - val_mean_absolute_error: 0.0121
Epoch 8/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0129 - mean_absolute_error: 0.0129 - val_loss: 0.0119 - val_mean_absolute_error: 0.0119
Epoch 9/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0130 - mean_absolute_error: 0.0130 - val_loss: 0.0119 - val_mean_absolute_error: 0.0119
Epoch 10/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0170 - mean_absolute_error: 0.0170 - val_loss: 0.0121 - val_mean_absolute_error: 0.0121
Epoch 11/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0151 - mean_absolute_error: 0.0151 - val_loss: 0.0121 - val_mean_absolute_error: 0.0121
Epoch 12/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0120 - mean_absolute_error: 0.0120 - val_loss: 0.0123 - val_mean_absolute_error: 0.0123
Epoch 13/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0147 - mean_absolute_error: 0.0147 - val_loss: 0.0123 - val_mean_absolute_error: 0.0123
Epoch 14/100
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0172 - mean_absolute_error: 0.0172 - val_loss: 0.0120 - val_mean_absolute_error: 0.0120
Epoch 14: early stopping
Restoring model weights from the end of the best epoch: 9.
In [31]:
test_norm_predict = conv_model.predict(test_seqX)
test_predict = sc.inverse_transform(test_norm_predict)
testconv_Mae = mean_absolute_error(testY, test_predict)
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 15ms/step

(a) Report the unnormalized MAE of the test set on your best model.¶

(b) Plot the loss curves for training and validation sets for the best model.¶

In [32]:
fig = go.Figure()
fig.add_trace(go.Scatter(y=conv_model_network.history['loss'], mode='lines', name='Training error'))
fig.add_trace(go.Scatter(y=conv_model_network.history['val_loss'], mode='lines', name='Validation error'))
fig.update_layout(xaxis_title = 'Epochs', yaxis_title = 'Mean absolute error', title_text = f'Unnormalized MAE: {testconv_Mae:.3f}')

(c) What are the predicted values of y for March 1st and March 2nd?¶

In [33]:
data_norm = sc.transform(y.reshape(-1, 1))
# for align with conv model
last_7_values = data_norm[-7:].reshape(1, 7)
pred_march1_norm = conv_model.predict(last_7_values)
pred_march1_conv = sc.inverse_transform(pred_march1_norm)
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step
In [34]:
last_pred_combine = np.concatenate([data_norm.flatten(), pred_march1_norm.flatten()])
updated_last_7_values = last_pred_combine[-7:].reshape(1, 7)
pred_march2_norm = GRU_model.predict(updated_last_7_values)
pred_march2_conv  = sc.inverse_transform(pred_march2_norm)
print(f"Prediction for March 1st {pred_march1_GRU[0][0]:.3f}, prediction for March 2nd {pred_march2_conv[0][0]:.3f}")
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step
Prediction for March 1st 66.749, prediction for March 2nd 72.813

Conclusion¶

In [35]:
print('Feed forward:')
print(f'MAE: {test_Mae:.3f}\nMarch 1st: {pred_march1_feedforward[0][0]:.3f} March 2nd: {pred_march2_feedforward[0][0]:.3f}\n')
print('Recurrent network:')
print(f'LSTM:\nMAE: {testLSTM_Mae:.3f}\nMarch 1st: {pred_march1_LSTM[0][0]:.3f} March 2nd: {pred_march2_LSTM[0][0]:.3f}\n')
print(f'GRU:\nMAE: {test_GRUMae:.3f}\nMarch 1st: {pred_march1_GRU[0][0]:.3f} March 2nd: {pred_march2_GRU[0][0]:.3f}\n')
print('Convnet 1D:')
print(f'MAE: {testconv_Mae:.3f}\nMarch 1st: {pred_march1_conv[0][0]:.3f} March 2nd: {pred_march2_conv[0][0]:.3f}')
Feed forward:
MAE: 7.568
March 1st: 71.550 March 2nd: 72.606

Recurrent network:
LSTM:
MAE: 6.723
March 1st: 72.899 March 2nd: 72.956

GRU:
MAE: 6.925
March 1st: 66.749 March 2nd: 72.250

Convnet 1D:
MAE: 6.744
March 1st: 71.662 March 2nd: 72.813

Based on those model's result, we can see the LSTM has minimum test error compare the other models